function [beta,se,tstat,pval,CI,out] = regressHet(Y,X,option,labels,R,r,display,alpha,alphaWhite,alphaWald)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%% Code by: Brent Hickman, Washington University in St. Louis - Olin %%
%%%% Last updated: August 2020                                         %%
%%%% Email for comments/bugs: hickmanbr@gmail.com                      %%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%% This function performs linear-in-parameters regression and inference under heteroskedasticity, 
%%%% and performs the simplified White test for heteroskedasticity.
%%%%
%%%% INPUT ARGUMENTS:
%%%%
%%%% Y is the dependent variable
%%%% X is the full matrix of independent variables
%%%% R and r are OPTIONAL and used to test linear restrictions of the form R*beta=r.  The user can
%%%%%%%% supress these arguments or only include the first three input arguments if no general 
%%%%%%%% linear restriction tests are desired.
%%%% OPTION is optional and indicates method.  When OPTION = 'robust' the program computes
%%%%%%%% heteroskedasticity-consistent variance-covariance matrix for parameters and performs a
%%%%%%%% heteroskedasticity-robust Wald test for the linear restrictions, if provided by the user.
%%%%%%%% When OPTION='FGLS' the program performs feasible GLS estimation and produces t-statistics
%%%%%%%% and F-tests for linear restrictions in the usual way (which is asymptotically valid with
%%%%%%%% GLS).  When OPTION ='FGLSrobust', then feasible GLS is performed, and
%%%%%%%% heteroskedasticity-consistent variance-covariance matrix for parameters is computed, along
%%%%%%%% with heteroskedastiticy-robust Wald test for the (optional) linear restrictions provided by
%%%%%%%% the user.
%%%% DISPLAY is optional and determines what information (if any) is displayed to the command
%%%%%%%% window.  If DISPLAY=1 (default) then the results of the simplified White test and a table
%%%%%%%% with the regression output are printed to the command window.  If DISPLAY~=1, output is
%%%%%%%% supressed.
%%%% ALPHA is optional and determines the significance level for individual t-tests.  The default
%%%%%%%% value is (ALPHA=0.05)
%%%% ALPHAWHITE is optional and determines the significance level for the simplified White test of 
%%%%%%%% heteroskedasticity (where the null is homoskedastic errors).  The default value is 
%%%%%%%% (ALPHAWHITE=0.05).
%%%% ALPHAWALD is optional and determines the significance level for Wald tests of linear 
%%%%%%%% restrictions R*beta = r.  The default value is (ALPHAWALD=0.05).
%%%% NOTE1: WHEN PROVIDING OPTIONAL INPUTS, ALL MUST BE INCLUDED IF EVEN ONE IS INCLUDED.  IN ORDER 
%%%%%%%% TO SUPRESS SOME INPUTS WHILE INCLUDING OTHERS, ENTER THEM AS EMPTY BRACKETS '[]'.
%%%% NOTE2: regressHet.m automatically drops observations with NaN's or inf's
%%%%
%%%% SYNTAX:
%%%% beta = regressHet(Y,X,option)  (required output argument)  OR
%%%% beta = regressHet(Y,X,option,labels,R,r,display,alpha,alphaWhite,alphaWald)  (required output argument)  OR
%%%% beta = regressHet(Y,X,option,[],[],[],[],[],[],[])  (required output argument)  OR
%%%% [beta,se,tstat,pval,CI,out] = regressHet(Y,X,option) (opt output args)
%%%% [beta,se,tstat,pval,CI,out] = regressHet(Y,X,option,labels,R,r,display,alpha,alphaWhite,alphaWald) (opt output args)
%%%% [beta,se,tstat,pval,CI,out] = regressHet(Y,X,option,[],[],[],[],[],[],[]) (opt output args)
%%%%
%%%% OUTPUT ARGUMENTS:
%%%%
%%%% BETA is the only required output.  It contains a column vector with the parameter estimates.
%%%% SE is a column vector the same size as BETA, with standard errors.
%%%% TSTAT is a column vector the same size as BETA, with t-statistics for a full set of individual
%%%%%%%% hypotheses that each parameter value is zero.
%%%% PVAL is a column vector the same size as BETA, with p-values for a full set of individual
%%%%%%%% hypotheses that each parameter value is zero.
%%%% CI is a column vector the same size as BETA, with p-values for a full set of individual
%%%%%%%% hypotheses that each parameter value is zero.
%%%% OUT is a structure containing the results of the simplified White test (out.White), the Wald
%%%%%%%% test for linear restrictions (out.Wald), and the table of regression output (out.results).
%%%%%%%% It also contains a field indicating the method used (out.method).

if nargin<4
    labels{length(X(1,:)),1} = [] ;
    for ii=1:length(X(1,:))
        labels{ii,1} = strcat('var',num2str(ii)) ;
    end
    R = [] ;
    r = [] ;
    display = 1 ;
    alpha = 0.05 ;
    alphaWhite = 0.05 ;
    alphaWald  = 0.05 ;
end
testR = size(R) ;
testr = size(r) ;
if testR(1)~=testr(1)
    error('regressHet:BadInput','The number of rows in R and r must be the same.')
end
if ~isempty(R) && testR(2)~=length(X(1,:))
    error('regressHet:BadInput','The number of columns in R and X must be the same.')
end

if isempty(display) 
    display = 1 ;
end
if isempty(alpha) 
    alpha = 0.05 ;
end
if isempty(alphaWhite)
    alphaWhite = 0.05 ;
end
if isempty(alphaWald)
    alphaWald = 0.05 ;
end

%%%%Drop NaN's and inf's:
dropindx = find( isnan(Y) | isinf(Y) | isnan(sum(X,2)) | isinf(sum(X,2)) ) ;
Y(dropindx)   = [] ; 
X(dropindx,:) = [] ;
N             = length(Y) ;
K             = length(X(1,:))-1 ;


[betaOLS,~,UOLS]    = regress(Y,X) ; 
beta                = betaOLS ;
U2OLS               = UOLS.^2 ;
[~,~,~,~,STATSaux1] = regress(U2OLS,[ones(size(Y)), Y, Y.^2]) ;
R2aux1              = STATSaux1(1) ;
[~,~,~,~,STATSaux2] = regress(U2OLS,ones(size(Y))) ;
R2aux2              = STATSaux2(1) ;
out.White.Fstat     = ( (R2aux1-R2aux2)/2 )/( (1-R2aux1)/(N-3) ) ;
out.White.pval      = fcdf(out.White.Fstat,2,N-3,'upper') ;
out.White.df        = [2,N-3] ;
out.White.alpha     = alphaWhite ;
if out.White.pval<alphaWhite
    out.White.result = strcat('REJECT null hypothesis H0: homoskedastic errors at the_',num2str(alphaWhite),' level.') ;
else
    out.White.result = strcat('FAIL TO REJECT null hypothesis H0: homoskedastic errors at the_',num2str(alphaWhite),' level.') ;
end

if strcmpi(option,'robust')
    Vhat       = diag(UOLS.^2) ;
    VarCovHC   = ((X'*X)\(X'*Vhat*X))/(X'*X) ;
    VarCovHCI  = (N/(N-K-1))*VarCovHC ;
    se         = sqrt(diag(VarCovHCI)) ;
    tstat      = beta./se ;
    pval       = nan(size(beta)) ;
    CI         = nan(length(beta),2) ;
    CV         = tinv(1-alpha/2,N-K-1) ;
    out.method = option ;
    for ii=1:K+1
        pval(ii) = 2*tcdf(abs(tstat(ii)),N-K-1,'upper') ;
        CI(ii,1) = beta(ii) - CV*se(ii) ;
        CI(ii,2) = beta(ii) + CV*se(ii) ;
    end
    if ~isempty(R) && ~isempty(r)  %%%%This means we need to form the Het-robust Wald F-test
        q              = length(r) ;
        out.Wald.df    = [q N-K-1] ;
        out.Wald.Fstat = (((R*beta-r)'/(R*VarCovHCI*R'))*(R*beta-r))/q ;
        out.Wald.pval  = fcdf(out.Wald.Fstat,q,N-K-1,'upper') ;
        out.Wald.alpha = alphaWald ;
        if out.Wald.pval<alphaWald
            out.Wald.result = strcat('REJECT null hypothesis H0: R*beta=r at the_',num2str(alphaWald),' level.') ;
        else
            out.Wald.result = strcat('FAIL TO REJECT null hypothesis H0: R*beta=r at the_',num2str(alphaWald),' level.') ;
        end
        temp = 'H0 for Het-Robust Wald test involved:' ;
        firstdone = 0 ;
        for ii=1:length(X(1,:))
            if sum(abs(R(:,ii)))~=0 %%%%This means the variable in the iith column of X was part of the null hypothesis
                if firstdone==0
                    temp = [temp,' ',labels{ii,1}] ;  %#ok
                    firstdone = 1 ; 
                else
                    temp = [temp,', ',labels{ii,1}] ;  %#ok
                end
            end
        end
        out.Wald.varlist = temp ;
    end
    Variable        = labels ;
    Variable{end+1} = 'R^2' ;
    Variable{end+1} = 'N' ;
    R2temp          = 1 - sum(U2OLS)/sum((Y-mean(Y)).^2) ;
    R2Adjtemp       = 1 - (sum(U2OLS)/(N-K-1))/(sum((Y-mean(Y)).^2)/(N-1)) ;
    coeffs          = [beta;R2temp;N] ;
    StdErr          = [se;R2Adjtemp;NaN] ;
    T               = [tstat;NaN;NaN] ;
    Pval            = [pval;NaN;NaN] ;
    ConfInt         = [CI;NaN, NaN;NaN, NaN] ;
    out.results = table(Variable,coeffs,StdErr,T,Pval,ConfInt) ;
elseif strcmpi(option,'FGLS')
    logU2       = log(UOLS.^2) ;
    [~,~,resid] = regress(logU2,X) ;
    logh        = logU2 - resid ;
    h_hat       = exp(logh) ;
    Ystar       = nan(size(Y)) ;
    Xstar       = nan(size(X)) ;
    for ii=1:length(Ystar)
        Ystar(ii)   = Y(ii)/sqrt(h_hat(ii)) ;
        Xstar(ii,:) = X(ii,:)*(1/sqrt(h_hat(ii))) ;
    end
    beta        = (Xstar'*Xstar)\Xstar'*Ystar ;
    U_FGLS      = Ystar - Xstar*beta ;
    sigma2_FGLS = sum(U_FGLS.^2)/(N-K-1) ;
    VarCov_FGLS = sigma2_FGLS*eye(length(beta))/(Xstar'*Xstar) ;
    se          = sqrt(diag(VarCov_FGLS)) ;
    tstat       = beta./se ;
    pval        = nan(size(beta)) ;
    CI          = nan(length(beta),2) ;
    CV          = tinv(1-alpha/2,N-K-1) ;
    out.method  = option ;
    for ii=1:K+1
        pval(ii) = 2*tcdf(abs(tstat(ii)),N-K-1,'upper') ;
        CI(ii,1) = beta(ii) - CV*se(ii) ;
        CI(ii,2) = beta(ii) + CV*se(ii) ;
    end
    if ~isempty(R) && ~isempty(r)  %%%%This means we need to form the Het-robust Wald F-test
        q              = length(r) ;
        out.Wald.df    = [q N-K-1] ;
        out.Wald.Fstat = (((R*beta-r)'/(R*VarCov_FGLS*R'))*(R*beta-r))/q ;
        out.Wald.pval  = fcdf(out.Wald.Fstat,q,N-K-1,'upper') ;
        out.Wald.alpha = alphaWald ;
        if out.Wald.pval<alphaWald
            out.Wald.result = strcat('REJECT null hypothesis H0: R*beta=r at the_',num2str(alphaWald),' level.') ;
        else
            out.Wald.result = strcat('FAIL TO REJECT null hypothesis H0: R*beta=r at the_',num2str(alphaWald),' level.') ;
        end
        temp = 'H0 for Het-Robust Wald test involved:' ;
        firstdone = 0 ;
        for ii=1:length(X(1,:))
            if sum(abs(R(:,ii)))~=0 %%%%This means the variable in the iith column of X was part of the null hypothesis
                if firstdone==0
                    temp = [temp,' ',labels{ii,1}] ;  %#ok
                    firstdone = 1 ; 
                else
                    temp = [temp,', ',labels{ii,1}] ;  %#ok
                end
            end
        end
        out.Wald.varlist = temp ;
    end
    Variable        = labels ;
    Variable{end+1} = 'R^2' ;
    Variable{end+1} = 'N' ;
    R2temp          = 1 - sum(U2OLS)/sum((Y-mean(Y)).^2) ;
    R2Adjtemp       = 1 - (sum(U2OLS)/(N-K-1))/(sum((Y-mean(Y)).^2)/(N-1)) ;
    coeffs          = [beta;R2temp;N] ;
    StdErr          = [se;R2Adjtemp;NaN] ;
    T               = [tstat;NaN;NaN] ;
    Pval            = [pval;NaN;NaN] ;
    ConfInt         = [CI;NaN, NaN;NaN, NaN] ;
    out.results = table(Variable,coeffs,StdErr,T,Pval,ConfInt) ;
elseif strcmpi(option,'FGLSrobust')
    logU2       = log(UOLS.^2) ;
    [~,~,resid] = regress(logU2,X) ;
    logh        = logU2 - resid ;
    h_hat       = exp(logh) ;
    Ystar       = nan(size(Y)) ;
    Xstar       = nan(size(X)) ;
    for ii=1:length(Ystar)
        Ystar(ii)   = Y(ii)/sqrt(h_hat(ii)) ;
        Xstar(ii,:) = X(ii,:)*(1/sqrt(h_hat(ii))) ;
    end
    beta       = (Xstar'*Xstar)\Xstar'*Ystar ;
    U_FGLS     = Ystar - Xstar*beta ;
    Vhat       = diag(U_FGLS.^2) ;
    VarCovHC   = ((Xstar'*Xstar)\(Xstar'*Vhat*Xstar))/(Xstar'*Xstar) ;
    VarCovHCI  = (N/(N-K-1))*VarCovHC ;
    se         = sqrt(diag(VarCovHCI)) ;
    tstat      = beta./se ;
    pval       = nan(size(beta)) ;
    CI         = nan(length(beta),2) ;
    CV         = tinv(1-alpha/2,N-K-1) ;
    out.method = option ;
    for ii=1:K+1
        pval(ii) = 2*tcdf(abs(tstat(ii)),N-K-1,'upper') ;
        CI(ii,1) = beta(ii) - CV*se(ii) ;
        CI(ii,2) = beta(ii) + CV*se(ii) ;
    end
    if ~isempty(R) && ~isempty(r)  %%%%This means we need to form the Het-robust Wald F-test
        q              = length(r) ;
        out.Wald.df    = [q N-K-1] ;
        out.Wald.Fstat = (((R*beta-r)'/(R*VarCovHCI*R'))*(R*beta-r))/q ;
        out.Wald.pval  = fcdf(out.Wald.Fstat,q,N-K-1,'upper') ;
        out.Wald.alpha = alphaWald ;
        if out.Wald.pval<alphaWald
            out.Wald.result = strcat('REJECT null hypothesis H0: R*beta=r at the_',num2str(alphaWald),' level.') ;
        else
            out.Wald.result  = strcat('FAIL TO REJECT null hypothesis H0: R*beta=r at the_',num2str(alphaWald),' level.') ;
        end
        temp = 'H0 for Het-Robust Wald test involved:' ;
        firstdone = 0 ;
        for ii=1:length(X(1,:))
            if sum(abs(R(:,ii)))~=0 %%%%This means the variable in the iith column of X was part of the null hypothesis
                if firstdone==0
                    temp = [temp,' ',labels{ii,1}] ;  %#ok
                    firstdone = 1 ; 
                else
                    temp = [temp,', ',labels{ii,1}] ;  %#ok
                end
            end
        end
        out.Wald.varlist = temp ;
    end
    Variable        = labels ;
    Variable{end+1} = 'R^2' ;
    Variable{end+1} = 'N' ;
    R2temp          = 1 - sum(U2OLS)/sum((Y-mean(Y)).^2) ;
    R2Adjtemp       = 1 - (sum(U2OLS)/(N-K-1))/(sum((Y-mean(Y)).^2)/(N-1)) ;
    coeffs          = [beta;R2temp;N] ;
    StdErr          = [se;R2Adjtemp;NaN] ;
    T               = [tstat;NaN;NaN] ;
    Pval            = [pval;NaN;NaN] ;
    ConfInt         = [CI;NaN, NaN;NaN, NaN] ;
    out.results = table(Variable,coeffs,StdErr,T,Pval,ConfInt) ;
else
    error('regressHet:BadInput','There are three values for OPTION: ''robust'', ''FGLS'', and ''FGLSrobust''')
end


if display==1
    disp('-----------------------------------------------------------------------------------')
    disp('----------------  SIMPLIFIED WHITE TEST FOR HETEROSKEDASTICITY  -------------------')
    disp('-----------------------------------------------------------------------------------')
    Fstat  = out.White.Fstat %#ok
    df     = out.White.df %#ok
    pval   = out.White.pval %#ok
    result = out.White.result %#ok
    if strcmpi(option,'robust')
        disp('-----------------------------------------------------------------------------------')
        disp('--------------  OLS W/HETEROSKEDASTICITY-CONSISTENT STANDARD ERRORS  --------------')
        disp('-----------------------------------------------------------------------------------')
    elseif strcmpi(option,'FGLS')
        disp('-----------------------------------------------------------------------------------')
        disp('------------------------------  FEASIBLE GLS  -------------------------------------')
        disp('-----------------------------------------------------------------------------------')
    elseif strcmpi(option,'FGLSrobust')
        disp('-----------------------------------------------------------------------------------')
        disp('---------  FEASIBLE GLS W/HETEROSKEDASTICITY-CONSISTENT STANDARD ERRORS  ----------')
        disp('-----------------------------------------------------------------------------------')
    end
    results = out.results %#ok
    if ~isempty(R) && ~isempty(r)
        disp('-----------------------------------------------------------------------------------')
        disp('-------------------  HET-ROBUST WALD TEST FOR R*beta=r  ---------------------------')
        disp('-----------------------------------------------------------------------------------')
        Fstat   = out.Wald.Fstat %#ok
        df      = out.Wald.df  %#ok
        pval    = out.Wald.pval %#ok
        result  = out.Wald.result %#ok
        varlist = out.Wald.varlist %#ok
    end
end



end